####ReCheck variable coverage


#### Strong lead donors only

rm(list=ls())
library(epicalc)
zap()


library(foreign)
library(MASS)

sar.f <- function(es, x, y, w){
    rho <- es[1]
    s2 <- es[2]^2
    if (s2<0) return(-1e20)
    k <- ncol(x)
    b <- as.matrix(es[3:(k+2)])
    llik <- 0
    nd <- nrow(w)
    n <- nrow(x)
    d <- n/nd
    y <- as.matrix(y)
    
    for(i in 1:d){
        A <- diag(1, nd, nd)-rho*w[,,i]
        dA <- det(A)
        if (dA <=0){
            llik <-  -1e20 
            break
        } else {
            aux <- log(dA) -nd/2 * log(s2)- (1/(2*s2)) * t(A %*% y[((i-1)*nd+1):((i-1)*nd+nd),] - as.matrix(x[((i-1)*nd+1):((i-1)*nd+nd),])%*%b) %*% (A %*% y[((i-1)*nd+1):((i-1)*nd+nd),]- as.matrix(x[((i-1)*nd+1):((i-1)*nd+nd),])%*%b)
            llik <- llik + aux
        }
    }
    return(llik)
}

sar <- function(x, y, w, stval,vars=c(""), routit=100000){
    mod <- optim(stval, sar.f, hessian = TRUE, method = "BFGS", 
            control = list(fnscale = -1, REPORT = 10, 
            reltol = 1e-16, trace = 1, 
            maxit = routit), x=x, y=y, w=w)
    vacov <- as.matrix(solve(-1*mod$hessian, tol=1e-24))
    se <- as.matrix(sqrt(diag(vacov)))
    zstat <- as.matrix(mod$par / se)
    pvalues <- round(2*pnorm(abs(zstat),lower.tail=FALSE),4)
    
    cat("results in order: rho, s, b \n")
    cat("parameter estimates: ", mod$par, "\n")
    cat("standard error: ", se, "\n")
    cat("z-statistics: ", zstat, "\n")
    cat("p-values: ", pvalues, "\n")
    list(llik=mod$value, par=mod$par, se=se, zstat=zstat, 
        pvalues = pvalues, 
        hessian=mod$hessian, vacov=vacov
        )
}


#routines for Clarke test (panel-wise)
sar.clr <- function(pars, x, y, w){
    rho <- pars[1]
    s2 <- pars[2]^2
    k <- ncol(x)
    b <- as.matrix(pars[3:(k+2)])
    nd <- nrow(w)
    n <- nrow(x)
    d <- n/nd
    y <- as.matrix(y)
    llik <- rep(NA, d)
for(i in 1:d){
    A <- diag(1, nd, nd)-rho*w[,,i]
    dA <- det(A)
    llik[i] <- log(dA) -n/2 * log(s2)- (1/(2*s2)) * t(A %*% y[((i-1)*nd+1):((i-1)*nd+nd),] - as.matrix(x[((i-1)*nd+1):((i-1)*nd+nd),])%*%b) %*% (A %*% y[((i-1)*nd+1):((i-1)*nd+nd),]- as.matrix(x[((i-1)*nd+1):((i-1)*nd+nd),])%*%b)    
    }
    return(llik)
}

setwd("C:/Users/Martin Steinwand/Documents/Project Lead Donor/Data")
#setwd("/Users/datalabuser/Documents/Project Lead Donor/Data")

rawdat <- read.dta("DeliveryDyads.4.2.dta")
#ensure data is sorted on r_ccode year d_ccode:
if (rawdat$r_ccode[1]!=rawdat$r_ccode[2]) cat("\n\n WARNING, data not sorted properly!!!\n\n")
if (rawdat$d_ccode[1]==rawdat$d_ccode[2]) cat("\n\n WARNING, data not sorted properly!!!\n\n")
if (rawdat$year[1]!=rawdat$year[2]) cat("\n\n WARNING, data not sorted properly!!!\n\n")

########################
#Strong lead donors only
workdat <- data.frame(rawdat[!rawdat$d_ccode==732 & rawdat$ldstrong==1  & is.na(rawdat$ldstrong)==0
#                        & rawdat$nogovaid==0
                        & is.na(rawdat$pubsector_f)==0 & is.na(rawdat$population)==0
                        & is.na(rawdat$voteshare_f)==0 & is.na(rawdat$rgdpch)==0,])
attach(workdat)

vars <- cbind(pubsector_f, 1, stronglead5of9r2, ODADonYear*10e-2, hhi, OilExp_rf*1e-8, 
        TotImp_rf*1e-8, Camerica,Samerica, Africa, Meast, Asia, 
        Oceania, population*1e-8, rgdpch*1e-2, colown, voteshare_f)

varspop <- cbind(PubsectorPop, 1, stronglead5of9r2, ODADonYear*10e-2, hhi, OilExp_rf*1e-8, 
        TotImp_rf*1e-8, Camerica,Samerica, Africa, Meast, Asia, 
        Oceania, population*1e-8, rgdpch*1e-2, colown, voteshare_f)


#remove missing country dummies
vnames <- c("Aid", "Constant", "Lead Donor", "Total Donor Aid ", "Donor Concentration", 
                                "Oil Exports", "Total Imports", "Central America",
                                 "South America", "Africa", 
                                "Middle East", "Asia","Oceania","Population", 
                                "GDP per capita, ppp","Former Colony", 
                                "Joint UN Voting")

colnames(vars) <- vnames

#get aggregates for 
nvars <- aggregate(vars, list(d_ccode, r_ccode, leadid5of9r2), mean)
nvarspop <- aggregate(varspop, list(d_ccode, r_ccode, leadid5of9r2), mean)
colnames(nvars)[1:3] <- c("d_ccode","r_ccode","leadid5of9r2")
colnames(nvars)[ncol(nvars)] <- c("Joint UN Voting")
y <- nvars[,4]
ypop <- nvarspop[,4]
x <- nvars[,5:ncol(nvars)]

#counts 
n <- nrow(nvars)
nd <- length(table(nvars$d_ccode))
d <- n/nd


#W matrix -- lead donor

#blog diagonal matrix or array
#get aggregated donor list 
dlist <- nvars[1:nd,1]

wall.ld <- array(NA, c(nd, nd, d))
wraw <- matrix(c(rep(.5/(nd-2),nd)),nd, nd, byrow=TRUE)
ldrow <- rep(1/(nd-1), nd)
for (i in 1:d){
    wsub <- wraw
    wsub[dlist==nvars[(1+(i-1)*nd),3],] <- ldrow
    wsub[,dlist==nvars[(1+(i-1)*nd),3]] <- rep(.5, nd)
    diag(wsub) <- 0
    wall.ld[,,i] <- wsub
}    
 


set.seed(021175)
#stv <- runif((ncol(x) + 2))*.01
#avg.ld.wld <- sar(x, y, wall.ld, stval=stv, vars=vnames[2:length(vnames)])
#setwd("C:/Users/Martin Steinwand/Documents/Project Lead Donor/Analysis/R&R.IO")
#save(avg.ld.wld, file="GovAid.strongld.2.2.wld")

set.seed(021175)
#stv <- runif((ncol(x) + 2))*.01
#pop.ld.wld <- sar(x, ypop, wall.ld, stval=stv, vars=vnames[2:length(vnames)])
#setwd("C:/Users/Martin Steinwand/Documents/Project Lead Donor/Analysis/R&R.IO")
#save(pop.ld.wld, file="GovAidpop.strongld.2.2.wld")



#check model convergence, estimate with other starting values
set.seed(072275)
#stv <- runif((ncol(x) + 2))*.1
#avg.ld.wld2 <- sar(x, y, wall.ld, stval=stv, vars=vnames[2:length(vnames)])


#W matrix -- even
#array
wall.ev <- array(NA, c(nd, nd, d))
wraw <- matrix(c(rep(1/(nd-1),nd)),nd, nd, byrow=TRUE)
diag(wraw) <- 0
wall.ev[,,1:d] <- wraw

set.seed(021175)
#stv <- runif((ncol(x) + 2))*.1
#avg.ld.wev <- sar(x, y, wall.ev, stval=stv, vars=vnames[2:length(vnames)])

#setwd("C:/Users/Martin Steinwand/Documents/Project Lead Donor/Analysis/R&R.IO")
#save(avg.ld.wev, file="GovAid.strongld.2.2.wev")

#check model convergence, estimate with other starting values
set.seed(072275)
#stv <- runif((ncol(x) + 2))*.1
#avg.ld.wev2 <- sar(x, y, wall.ev, stval=stv, vars=vnames[2:length(vnames)])


#set.seed(021175)
#stv <- runif((ncol(x) + 2))*.01
#pop.ld.wev <- sar(x, ypop, wall.ev, stval=stv, vars=vnames[2:length(vnames)])
#setwd("C:/Users/Martin Steinwand/Documents/Project Lead Donor/Analysis/R&R.IO")
#save(pop.ld.wev, file="GovAidpop.strongld.2.2.wev")

#W matrix -- geographic

wall.geo <- array(NA, c(nd, nd, d))
ldist <- function(dif, maxdif){ #scaled logit of difference in distance to recipient country
    1/(1+exp(-dif/maxdif))
}
for (j in 1:d){
    for (a in 1:nd){
        raw.row <- ldist(distance[a+(j-1)*nd]-distance[(1+(j-1)*nd):(nd+(j-1)*nd)], max(distance[(1+(j-1)*nd):(nd+(j-1)*nd)]))
        raw.row[a] <- 0 
        wall.geo[a,,j] <- raw.row/sum(raw.row) #row standardize
    }
}

set.seed(021175)
stv <- runif((ncol(x) + 2))*.01
pop.ld.geo <- sar(x, ypop, wall.geo, stval=stv, vars=vnames[2:length(vnames)])
#setwd("C:/Users/Martin Steinwand/Documents/Project Lead Donor/Analysis/R&R.IO")
setwd("/Users/datalabuser/Documents/Project Lead Donor/Analysis/R&R.IO")
save(pop.ld.geo, file="GovAidpop.strongld.2.2.geo")


#####################################
#Clarke test 
llik.ld.wld <- sar.clr(avg.ld.wld$par, x, y, wall.ld)
llik.ld.wev <- sar.clr(avg.ld.wev$par, x, y, wall.ev)
llik.ord <-as.numeric(llik.ld.wev>=llik.ld.wld)#first model is H0
p <- pbinom(sum(llik.ord), d, .5)#p value that 2nd model is better

llik.pop.wld <- sar.clr(pop.ld.wld$par, x, ypop, wall.ld)
llik.pop.wev <- sar.clr(pop.ld.wev$par, x, ypop, wall.ev)
llik.ord <-as.numeric(llik.pop.wev>=llik.pop.wld)#first model is H0
p <- pbinom(sum(llik.ord), d, .5)#p value that 2nd model is better

clarke <- function(mod1,mod2,w1,w2, x, y){
    llik1 <- sar.clr(mod1$par, x, y, w1)
    llik2 <- sar.clr(mod2$par, x, y, w2)
    llik.ord <-as.numeric(llik2>=llik1)
    pbinom(sum(llik.ord), d, .5)
}

#clarke(avg.ld.wld, avg.ld.wev, wall.ld, wall.ev, x, ypop)
p <- clarke(pop.ld.wev, pop.ld.wld, wall.ev, wall.ld, x, ypop)
cat("\nWith p<",p," model 1 has better fit than model 2\n")


#####################################
# LL Ratio Test of Spatial versus non-spatial spec
lr.test <- function(restricted, full){
    dgf <- abs(length(restricted$par)-length(full$par))
    r <- -2 * (restricted$llik - full$llik)
    p <- 1-pchisq(r, dgf)
    cat("degrees of freedom: ", dgf, "\n")
    cat("likelihood ratio statistic: ", r,"\n")
    cat("p value: ", p, " (under H0 that the restricted model is correct)\n\n")
    return(list(r = r, p = p))
}

    #get ols estimates
    
X<- as.matrix(x)
olspar <- solve(t(X)%*%X)%*%t(X)%*%y    
olssig2 <- 1/(nrow(X)-2) * t(y-X%*%olspar)%*%(y-X%*%olspar)
olsllik <- sum(log(dnorm(y, X%*%olspar, sqrt(olssig2))))
olsmod <- list(par=olspar, llik=olsllik)

lr.test(olsmod, avg.ld.wld)
